library(data.table)
library(viridis)
Loading required package: viridisLite
library(ggplot2)
library(reshape2)

Attaching package: ‘reshape2’

The following objects are masked from ‘package:data.table’:

    dcast, melt
library(geosphere)
library(vegan)
library(raster)
Loading required package: sp

Attaching package: ‘raster’

The following object is masked from ‘package:data.table’:

    shift
#load spring and fall data
dat_NEUS_grid.spring.reduced_3plustows <- readRDS("/Users/zoekitchel/Documents/grad school/Rutgers/Repositories/trawl_spatial_turnover/data/gridded/USA-NEUS/dat_NEUS_grid.spring.reduced_3plustows.RData")

dat_NEUS_grid.fall.reduced_3plustows <- readRDS("/Users/zoekitchel/Documents/grad school/Rutgers/Repositories/trawl_spatial_turnover/data/gridded/USA-NEUS/dat_NEUS_grid.fall.reduced_3plustows.RData")

#distance among grid cells
neus_reg_distances_fall.l <- readRDS("/Users/zoekitchel/Documents/grad school/Rutgers/Repositories/trawl_spatial_turnover/data/gridded/USA-NEUS/neus_reg_distances_fall.l.RData")

neus_reg_distances_spring.l <- readRDS("/Users/zoekitchel/Documents/grad school/Rutgers/Repositories/trawl_spatial_turnover/data/gridded/USA-NEUS/neus_reg_distances_spring.l.RData")

Lists of Years

#spring
dat_NEUS_grid.spring.reduced_3plustows[,year:= as.numeric(year)] #make numeric
setorder(dat_NEUS_grid.spring.reduced_3plustows, year)

neus_years_spring <- unique(dat_NEUS_grid.spring.reduced_3plustows[,year])

#fall
dat_NEUS_grid.fall.reduced_3plustows[,year:= as.numeric(year)] #make numeric
setorder(dat_NEUS_grid.fall.reduced_3plustows, year)

neus_years_fall <- unique(dat_NEUS_grid.fall.reduced_3plustows[,year])

Haul ID keys

neus_spring_haulids <- unique(dat_NEUS_grid.spring.reduced_3plustows[,haulid])
neus_spring_haulids_key <- data.table(haulid = neus_spring_haulids, key_ID = seq(1,length(neus_spring_haulids), by = 1))

neus_fall_haulids <- unique(dat_NEUS_grid.fall.reduced_3plustows[,haulid])
neus_fall_haulids_key <- data.table(haulid = neus_fall_haulids, key_ID = seq(1,length(neus_fall_haulids), by = 1))

Convert haulids to numeric key_IDs

#spring
dat_NEUS_grid.spring.reduced_3plustows <- dat_NEUS_grid.spring.reduced_3plustows[neus_spring_haulids_key, on = "haulid"]

#fall
dat_NEUS_grid.fall.reduced_3plustows <- dat_NEUS_grid.fall.reduced_3plustows[neus_fall_haulids_key, on = "haulid"]

Dissimilarities across multiple years SPRING

neus_spring_distances_dissimilarities_allyears <- data.table("haulid1" = integer(), "haulid2" = integer(), "distance" = numeric(),"bray_curtis_dissimilarity" = numeric(), year = integer(), "jaccard_dissimilarity" = numeric())

#Now loop through all years
for (i in 1:length(neus_years_spring)) {
  reduced_year <- dat_NEUS_grid.spring.reduced_3plustows[year == neus_years_spring[i],]
  
  #distances among cells
  setorder(reduced_year, key_ID)
  
  lat_lon_haulid <- unique(reduced_year[,.(lat,lon,key_ID)])
  neus_distances <- distm(lat_lon_haulid[,.(lon,lat)])
  key_IDs <- lat_lon_haulid[,key_ID]

  colnames(neus_distances) <- rownames(neus_distances) <- key_IDs

  #wide to long
  haulid_distances.l <- reshape2::melt(neus_distances,varnames = (c("haulid1", "haulid2")), value.name = "distance")
  
  #make into data table
  haulid_distances.l <- data.table(haulid_distances.l)

  
  reduced_year_wide <- dcast(reduced_year, key_ID + year ~ matched_name2, value.var = "wtcpue", fun.aggregate = sum) #long to wide data for community matrix, column names are cell then species
  
  ncols <- ncol(reduced_year_wide)
  communitymatrix <- cbind(reduced_year_wide[,3:ncols], reduced_year_wide[,1:2]) #community matrix with year and cell on far right

  #list of haulid keys
  key_IDs_subset <- communitymatrix$key_ID

  dissimilarities_abundance <- vegdist(communitymatrix[,1:(ncols-2)], method = "bray", binary = F) #dissimilarity 
  dissimilarities_occurrence <- vegdist(communitymatrix[,1:(ncols-2)], method = "jaccard", binary = T) #T binary performs presence absence standardization before using decostand

  #make into matrix
  dissimilarities_abundance.m <- as.matrix(dissimilarities_abundance, labels=TRUE)
  dissimilarities_occurrence.m <- as.matrix(dissimilarities_occurrence, labels=TRUE)
  colnames(dissimilarities_abundance.m) <- rownames(dissimilarities_abundance.m) <- key_IDs_subset
  colnames(dissimilarities_occurrence.m) <- rownames(dissimilarities_occurrence.m) <- key_IDs_subset

  #reshape dissimilarities
  dissimilarities_abundance.l <- reshape2::melt(dissimilarities_abundance.m, varnames = c("haulid1", "haulid2"), value.name = "bray_curtis_dissimilarity")
  dissimilarities_occurrence.l <- reshape2::melt(dissimilarities_occurrence.m, varnames = c("haulid1", "haulid2"), value.name = "jaccard_dissimilarity")
  dissimilarities_abundance.l <- data.table(dissimilarities_abundance.l) #and then to data table
  dissimilarities_occurrence.l <- data.table(dissimilarities_occurrence.l)

  #add year for these values
  dissimilarities_abundance.l[, "year" := neus_years_spring[i]]
  dissimilarities_occurrence.l[, "year" := neus_years_spring[i]]

  #merge distance with dissimilarity for this year with both metrics of dissimilarity
  dissimilarities_full <- haulid_distances.l[dissimilarities_abundance.l, on = c("haulid1", "haulid2")]
  dissimilarities_full <- dissimilarities_full[dissimilarities_occurrence.l, on = c("haulid1", "haulid2", "year")]


  #add to data table
  neus_spring_distances_dissimilarities_allyears <- rbind(neus_spring_distances_dissimilarities_allyears, dissimilarities_full)
  
}
you have empty rows: their dissimilarities may be meaningless in method “bray”you have empty rows: their dissimilarities may be meaningless in method “jaccard”you have empty rows: their dissimilarities may be meaningless in method “bray”you have empty rows: their dissimilarities may be meaningless in method “jaccard”
summary(neus_spring_distances_dissimilarities_allyears) #here we have bray, jaccard and geographic distance
    haulid1         haulid2         distance       bray_curtis_dissimilarity      year     
 Min.   :    1   Min.   :    1   Min.   :      0   Min.   :0.0000            Min.   :1973  
 1st Qu.: 2765   1st Qu.: 2765   1st Qu.: 187365   1st Qu.:0.8404            1st Qu.:1984  
 Median : 6050   Median : 6050   Median : 330150   Median :0.9499            Median :1998  
 Mean   : 5949   Mean   : 5949   Mean   : 385001   Mean   :0.8818            Mean   :1997  
 3rd Qu.: 9137   3rd Qu.: 9137   3rd Qu.: 554298   3rd Qu.:0.9910            3rd Qu.:2010  
 Max.   :11686   Max.   :11686   Max.   :1206210   Max.   :1.0000            Max.   :2019  
 jaccard_dissimilarity
 Min.   :0.0000       
 1st Qu.:0.7059       
 Median :0.8214       
 Mean   :0.7924       
 3rd Qu.:0.9091       
 Max.   :1.0000       
#delete repeats
neus_spring_distances_dissimilarities_allyears <- neus_spring_distances_dissimilarities_allyears[haulid1 >= haulid2,] #3165272 to 1588479 rows


neus_spring_distances_dissimilarities_allyears[,bray_curtis_similarity := (1-bray_curtis_dissimilarity)][,jaccard_similarity := (1-jaccard_dissimilarity)]

saveRDS(neus_spring_distances_dissimilarities_allyears, file = "neus_spring_distances_dissimilarities_allyears.rds")

#Heat map

Subsample for plotting

#jaccard only
neus_spring_distances_dissimilarities_allyears_jaccard <- neus_spring_distances_dissimilarities_allyears[,.(year,distance,jaccard_similarity)]

#new column with rounded distance
neus_spring_distances_dissimilarities_allyears_jaccard[,distance_10s := round(distance,-1)] #round to nearest 100

#new column with median jaccard similarity
neus_spring_distances_dissimilarities_allyears_jaccard[, jaccard_similarity_mean := mean(jaccard_similarity), .(year, distance_10s)]

#reduce to unique values
neus_spring_distances_dissimilarities_allyears_jaccard_rounded <- unique(neus_spring_distances_dissimilarities_allyears_jaccard, by = c("year","distance_10s"))

All possible combos

#combine
neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos <- neus_spring_distances_dissimilarities_allyears_jaccard_allcombos[neus_spring_distances_dissimilarities_allyears_jaccard_rounded, by = c("year","distance_10s")]
Ignoring by= because j= is not suppliedError in `[.data.table`(neus_spring_distances_dissimilarities_allyears_jaccard_allcombos,  : 
  When i is a data.table (or character vector), the columns to join by must be specified using 'on=' argument (see ?data.table), by keying x (i.e. sorted, and, marked as sorted, see ?setkey), or by sharing column names between x and i (i.e., a natural join). Keyed joins might have further speed benefits on very large data due to x being sorted in RAM.

Trying to use ggplot

Convert to matrix

Convert comprehensive table to matrix

neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide <- data.table::dcast(neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos[,.(year,distance_10s,jaccard_similarity_mean)], distance_10s ~ year, fill = NA)
Using 'jaccard_similarity_mean' as value column. Use 'value.var' to override
neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide <- data.table::dcast(neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos[,.(year,distance_10s,jaccard_similarity_mean)], distance_10s ~ year, fill = NA)
Using 'jaccard_similarity_mean' as value column. Use 'value.var' to override
setkey(neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide, distance_10s)
#get rid of first column
neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide.m <- neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide[,2:ncol(neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide)]
#get rid of first column
neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide.m <- neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide[,2:ncol(neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide)]
neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide.m <- as.matrix(neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide.m)
neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide.r <- as.raster (neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide.m)

Plot raster

image(neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide.r)
Error in image.default(neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide.r) : 
  'z' must be numeric or logical

From “Depth-Time visualization using R, the tidyverse, and ggplot2” tutorial

Box plots

###maybe i’ll do this later? maybe not At its heart, this graphic is just a ggplot() with a geom_raster() layer. The complication is, geom_raster() requires equally spaced points in both the x- and y-direction. The rest of this post is about how to make that happen.

Linear Interpolation

I interpolate in the distance direction first, because I think this is the better assumption: as you go down further away, a reasonable way to estimate the similarity at a distance which you did not measure is to draw a straight line between the similarity at the distance that you did measure. (Not great, but sure)

estimate_jac_sim_by_year(1990, c(0,3000,300000))
collapsing to unique 'x' values
[1] 1.0000000 0.5103346 0.3037503

Expand data inputs

Visualize

First, we write a function that estimates the temperature at any date given a depth that is in temp_interp_depth (the tibble we just calculated).

estimate_jac_sim_by_distance(
  target_distance = 1000000, 
  target_year = c(1990,1998,2000)
)
[1] 0.12801222 0.06626988 0.06779441

Expand year grid

jac_sim_raster[,jaccard_similarity_modeled := estimate_jac_sim_by_distance(distance,year),by = distance]
Error in approx(data_for_distance$year, data_for_distance$jaccard_similarity_modeled,  : 
  need at least two non-NA values to interpolate

Finally, we have equally-spaced values in both the date and depth dimensions. This can be visualized using geom_raster(), with temp mapped to the fill aesthetic. I’ve again used scale_fill_gradient2() to ensure that red values represent “hot”, blue values represent “cool”, and that there is some way to visualize the depth of the thermocline. Finally, I’ve used coord_cartesian(expand = FALSE) to eliminate the white border around the outside of the raster layer…I think it looks nicer that way.

---
title: "NEUS_similarity_heatmap_distance"
output: html_notebook
---

```{r setup}
library(data.table)
library(viridis)
library(ggplot2)
library(reshape2)
library(geosphere)
library(vegan)
library(raster)

#load spring and fall data
dat_NEUS_grid.spring.reduced_3plustows <- readRDS("/Users/zoekitchel/Documents/grad school/Rutgers/Repositories/trawl_spatial_turnover/data/gridded/USA-NEUS/dat_NEUS_grid.spring.reduced_3plustows.RData")

dat_NEUS_grid.fall.reduced_3plustows <- readRDS("/Users/zoekitchel/Documents/grad school/Rutgers/Repositories/trawl_spatial_turnover/data/gridded/USA-NEUS/dat_NEUS_grid.fall.reduced_3plustows.RData")

#distance among grid cells
neus_reg_distances_fall.l <- readRDS("/Users/zoekitchel/Documents/grad school/Rutgers/Repositories/trawl_spatial_turnover/data/gridded/USA-NEUS/neus_reg_distances_fall.l.RData")

neus_reg_distances_spring.l <- readRDS("/Users/zoekitchel/Documents/grad school/Rutgers/Repositories/trawl_spatial_turnover/data/gridded/USA-NEUS/neus_reg_distances_spring.l.RData")
```
Lists of Years
```{r year lists}
#spring
dat_NEUS_grid.spring.reduced_3plustows[,year:= as.numeric(year)] #make numeric
setorder(dat_NEUS_grid.spring.reduced_3plustows, year)

neus_years_spring <- unique(dat_NEUS_grid.spring.reduced_3plustows[,year])

#fall
dat_NEUS_grid.fall.reduced_3plustows[,year:= as.numeric(year)] #make numeric
setorder(dat_NEUS_grid.fall.reduced_3plustows, year)

neus_years_fall <- unique(dat_NEUS_grid.fall.reduced_3plustows[,year])
```

Haul ID keys
```{r list of haulids}
neus_spring_haulids <- unique(dat_NEUS_grid.spring.reduced_3plustows[,haulid])
neus_spring_haulids_key <- data.table(haulid = neus_spring_haulids, key_ID = seq(1,length(neus_spring_haulids), by = 1))

neus_fall_haulids <- unique(dat_NEUS_grid.fall.reduced_3plustows[,haulid])
neus_fall_haulids_key <- data.table(haulid = neus_fall_haulids, key_ID = seq(1,length(neus_fall_haulids), by = 1))
```

Convert haulids to numeric key_IDs
```{r}
#spring
dat_NEUS_grid.spring.reduced_3plustows <- dat_NEUS_grid.spring.reduced_3plustows[neus_spring_haulids_key, on = "haulid"]

#fall
dat_NEUS_grid.fall.reduced_3plustows <- dat_NEUS_grid.fall.reduced_3plustows[neus_fall_haulids_key, on = "haulid"]
```


Dissimilarities across multiple years
SPRING
```{r dissimilarities between cells across multiple years for spring}
neus_spring_distances_dissimilarities_allyears <- data.table("haulid1" = integer(), "haulid2" = integer(), "distance" = numeric(),"bray_curtis_dissimilarity" = numeric(), year = integer(), "jaccard_dissimilarity" = numeric())

#Now loop through all years
for (i in 1:length(neus_years_spring)) {
  reduced_year <- dat_NEUS_grid.spring.reduced_3plustows[year == neus_years_spring[i],]
  
  #distances among cells
  setorder(reduced_year, key_ID)
  
  lat_lon_haulid <- unique(reduced_year[,.(lat,lon,key_ID)])
  neus_distances <- distm(lat_lon_haulid[,.(lon,lat)])
  key_IDs <- lat_lon_haulid[,key_ID]

  colnames(neus_distances) <- rownames(neus_distances) <- key_IDs

  #wide to long
  haulid_distances.l <- reshape2::melt(neus_distances,varnames = (c("haulid1", "haulid2")), value.name = "distance")
  
  #make into data table
  haulid_distances.l <- data.table(haulid_distances.l)

  
  reduced_year_wide <- dcast(reduced_year, key_ID + year ~ matched_name2, value.var = "wtcpue", fun.aggregate = sum) #long to wide data for community matrix, column names are cell then species
  
  ncols <- ncol(reduced_year_wide)
  communitymatrix <- cbind(reduced_year_wide[,3:ncols], reduced_year_wide[,1:2]) #community matrix with year and cell on far right

  #list of haulid keys
  key_IDs_subset <- communitymatrix$key_ID

  dissimilarities_abundance <- vegdist(communitymatrix[,1:(ncols-2)], method = "bray", binary = F) #dissimilarity 
  dissimilarities_occurrence <- vegdist(communitymatrix[,1:(ncols-2)], method = "jaccard", binary = T) #T binary performs presence absence standardization before using decostand

  #make into matrix
  dissimilarities_abundance.m <- as.matrix(dissimilarities_abundance, labels=TRUE)
  dissimilarities_occurrence.m <- as.matrix(dissimilarities_occurrence, labels=TRUE)
  colnames(dissimilarities_abundance.m) <- rownames(dissimilarities_abundance.m) <- key_IDs_subset
  colnames(dissimilarities_occurrence.m) <- rownames(dissimilarities_occurrence.m) <- key_IDs_subset

  #reshape dissimilarities
  dissimilarities_abundance.l <- reshape2::melt(dissimilarities_abundance.m, varnames = c("haulid1", "haulid2"), value.name = "bray_curtis_dissimilarity")
  dissimilarities_occurrence.l <- reshape2::melt(dissimilarities_occurrence.m, varnames = c("haulid1", "haulid2"), value.name = "jaccard_dissimilarity")
  dissimilarities_abundance.l <- data.table(dissimilarities_abundance.l) #and then to data table
  dissimilarities_occurrence.l <- data.table(dissimilarities_occurrence.l)

  #add year for these values
  dissimilarities_abundance.l[, "year" := neus_years_spring[i]]
  dissimilarities_occurrence.l[, "year" := neus_years_spring[i]]

  #merge distance with dissimilarity for this year with both metrics of dissimilarity
  dissimilarities_full <- haulid_distances.l[dissimilarities_abundance.l, on = c("haulid1", "haulid2")]
  dissimilarities_full <- dissimilarities_full[dissimilarities_occurrence.l, on = c("haulid1", "haulid2", "year")]


  #add to data table
  neus_spring_distances_dissimilarities_allyears <- rbind(neus_spring_distances_dissimilarities_allyears, dissimilarities_full)
  
}

summary(neus_spring_distances_dissimilarities_allyears) #here we have bray, jaccard and geographic distance

#delete repeats
neus_spring_distances_dissimilarities_allyears <- neus_spring_distances_dissimilarities_allyears[haulid1 >= haulid2,] #3165272 to 1588479 rows


neus_spring_distances_dissimilarities_allyears[,bray_curtis_similarity := (1-bray_curtis_dissimilarity)][,jaccard_similarity := (1-jaccard_dissimilarity)]

saveRDS(neus_spring_distances_dissimilarities_allyears, file = "neus_spring_distances_dissimilarities_allyears.rds")

```

#Heat map

Subsample for plotting
```{r subsample}
#jaccard only
neus_spring_distances_dissimilarities_allyears_jaccard <- neus_spring_distances_dissimilarities_allyears[,.(year,distance,jaccard_similarity)]

#new column with rounded distance
neus_spring_distances_dissimilarities_allyears_jaccard[,distance_10s := round(distance,-1)] #round to nearest 100

#new column with median jaccard similarity
neus_spring_distances_dissimilarities_allyears_jaccard[, jaccard_similarity_mean := mean(jaccard_similarity), .(year, distance_10s)]

#reduce to unique values
neus_spring_distances_dissimilarities_allyears_jaccard_rounded <- unique(neus_spring_distances_dissimilarities_allyears_jaccard, by = c("year","distance_10s"))
```


All possible combos
```{r all combos}
#all possible 10s
seq_10 <- seq(0,max(neus_spring_distances_dissimilarities_allyears_jaccard_rounded$distance_10s), by = 10)

#datatable with all possible combos
neus_spring_distances_dissimilarities_allyears_jaccard_allcombos <- data.table(expand.grid(year = neus_years_spring, distance_10s = seq_10))

#combine
neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos <- neus_spring_distances_dissimilarities_allyears_jaccard_allcombos[neus_spring_distances_dissimilarities_allyears_jaccard_rounded, on = c("year","distance_10s")]
```


Trying to use ggplot
```{r}
ggplot(neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos, aes(year, distance_10s, fill = jaccard_similarity_mean)) +
  geom_raster(interpolate = T) +
  theme_classic() +
  scale_fill_viridis(discrete = F)

library(sp)

```

Convert to matrix
```{r jaccard matrix}
neus_spring_distances_dissimilarities_allyears_jaccard_wide <-
data.table::dcast(neus_spring_distances_dissimilarities_allyears_jaccard_rounded[,.(year,distance_10s,jaccard_similarity_mean)], distance_10s ~ year, fill = NA)

setkey(neus_spring_distances_dissimilarities_allyears_jaccard_wide, distance_10s)

#get rid of first column
neus_spring_distances_dissimilarities_allyears_jaccard_wide.m <- neus_spring_distances_dissimilarities_allyears_jaccard_wide[,2:ncol(neus_spring_distances_dissimilarities_allyears_jaccard_wide)]

neus_spring_distances_dissimilarities_allyears_jaccard_wide.m <- as.matrix(neus_spring_distances_dissimilarities_allyears_jaccard_wide.m)

neus_spring_distances_dissimilarities_allyears_jaccard_wide.r <- as.raster (neus_spring_distances_dissimilarities_allyears_jaccard_wide.m)
```

Convert comprehensive table to matrix
```{r}
neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide <- data.table::dcast(neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos[,.(year,distance_10s,jaccard_similarity_mean)], distance_10s ~ year, fill = NA)

setkey(neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide, distance_10s)

#get rid of first column
neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide.m <- neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide[,2:ncol(neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide)]

neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide.m <- as.matrix(neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide.m)

neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide.r <- as.raster (neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide.m)
```

Plot raster
```{r}
image(neus_spring_distances_dissimilarities_allyears_jaccard_rounded_allcombos_wide.r)
```
From "Depth-Time visualization using R, the tidyverse, and ggplot2" tutorial

```{r}

#jaccard similarity
ggplot(neus_spring_distances_dissimilarities_allyears,aes(year,distance, color = jaccard_similarity)) +
  geom_point() +
  scale_color_gradientn(colors =
c("purple" ,"blue"  , "green" , "yellow" ,"orange", "red")
  ) +
  labs(y = "Distance (m)", x = "Year", color = "Jaccard Similarity") +
  theme_classic()

#bray curtis similarity
ggplot(neus_spring_distances_dissimilarities_allyears,aes(year,distance, color = bray_curtis_similarity)) +
  geom_point() +
  scale_color_gradientn(colors =
c("purple" ,"blue"  , "green" , "yellow" ,"orange", "red")
  ) +
  labs(y = "Distance (m)", x = "Year", color = "Bray Curtis Similarity") +
  theme_classic()
```
Box plots
```{r}
#convert year to factor
setkey(neus_spring_distances_dissimilarities_allyears,year)
neus_spring_distances_dissimilarities_allyears[,year.f := as.factor(year)]

#jaccard similarity
ggplot(neus_spring_distances_dissimilarities_allyears,aes(year.f,jaccard_similarity)) +
  geom_boxplot(outlier.shape = NA) +
  labs(x="Year", y = "Jaccard Similarity") +
  scale_x_discrete(limits=levels(neus_spring_distances_dissimilarities_allyears$year.f)) +
  geom_smooth(method = "lm", se=FALSE, color="red", aes(group=1)) +
  ylim(c(0,0.68)) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90))

#bray curtis similarity
ggplot(neus_spring_distances_dissimilarities_allyears,aes(year.f,bray_curtis_similarity)) +
  geom_boxplot(outlier.shape = NA) +
  labs(x="Year", y = "Bray Curtis Similarity") +
  scale_x_discrete(limits=levels(neus_spring_distances_dissimilarities_allyears$year.f)) +
  geom_smooth(method = "lm", se=FALSE, color="red", aes(group=1)) +
    ylim(c(0,0.48)) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90))
```


###maybe i'll do this later? maybe not
At its heart, this graphic is just a ggplot() with a geom_raster() layer. The complication is, geom_raster() requires equally spaced points in both the x- and y-direction. The rest of this post is about how to make that happen.

Linear Interpolation

I interpolate in the distance direction first, because I think this is the better assumption: as you go down further away, a reasonable way to estimate the similarity at a distance which you did not measure is to draw a straight line between the similarity at the distance that you did measure. (Not great, but sure)
```{r}
estimate_jac_sim_by_year <- function(target_year, target_distance) {
  data_for_year <- neus_spring_distances_dissimilarities_allyears[year == target_year]
  
  # approx() is one way to do a linear interpolation
  approx(data_for_year$distance, data_for_year$jaccard_similarity, xout = target_distance)$y
}

estimate_jac_sim_by_year(1990, c(0,3000,300000))
```
Expand data inputs
```{r}
temp_interp_depth <- data.table(expand.grid(
  # the same years as neus spring
  year = neus_years_spring,
  # ddistances can now be any value
  distance = seq(0, max(neus_spring_distances_dissimilarities_allyears$distance), by = 10))
)

temp_interp_depth[,jaccard_similarity_modeled := estimate_jac_sim_by_year(year,distance), year]

```
Visualize
```{r}
ggplot(temp_interp_depth,aes(year,distance, color = jaccard_similarity_modeled)) +
  geom_point() +
   scale_colour_gradient2(
    midpoint = 0.5, 
    high = scales::muted("red"), 
    low = scales::muted("blue")
  )
```

First, we write a function that estimates the temperature at any date given a depth that is in temp_interp_depth (the tibble we just calculated).

```{r}
estimate_jac_sim_by_distance <- function(target_distance, target_year) {
  data_for_distance <- temp_interp_depth[distance == target_distance,]
    setkey(data_for_distance, year)
  approx(data_for_distance$year, data_for_distance$jaccard_similarity_modeled, xout = target_year)$y
}

estimate_jac_sim_by_distance(
  target_distance = 1000000, 
  target_year = c(1990,1998,2000)
)
```
Expand year grid
```{r}
jac_sim_raster <- data.table(expand.grid(
  # dates can now be any value
  year = seq(min(neus_years_spring), max(neus_years_spring), by = 1),
  # depths must be the same as in temp_interp_depth
  distance = unique(temp_interp_depth$distance))
)

jac_sim_raster[,jaccard_similarity_modeled := estimate_jac_sim_by_distance(distance,year),by = distance]


```
Finally, we have equally-spaced values in both the date and depth dimensions. This can be visualized using geom_raster(), with temp mapped to the fill aesthetic. I’ve again used scale_fill_gradient2() to ensure that red values represent “hot”, blue values represent “cool”, and that there is some way to visualize the depth of the thermocline. Finally, I’ve used coord_cartesian(expand = FALSE) to eliminate the white border around the outside of the raster layer…I think it looks nicer that way.


```{r final jaccard similarity plot}
ggplot(jac_sim_raster, aes(year, distance, fill = jaccard_similarity_modeled)) +
  geom_raster() +
  scale_fill_gradientn(colors =
c("purple" ,"blue"  , "green" , "yellow" ,"orange", "red")
  ) +
  coord_cartesian(expand = FALSE)
```

